/************************************************************************
* 
*  Licensed to the Apache Software Foundation (ASF) under one
*  or more contributor license agreements.  See the NOTICE file
*  distributed with this work for additional information
*  regarding copyright ownership.  The ASF licenses this file
*  to you under the Apache License, Version 2.0 (the
*  "License"); you may not use this file except in compliance
*  with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing,
*  software distributed under the License is distributed on an
*  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
*  KIND, either express or implied.  See the License for the
*  specific language governing permissions and limitations
*  under the License.
*
************************************************************************/
package org.odftoolkit.odfdom.incubator.doc.text;

import org.odftoolkit.odfdom.dom.DefaultElementVisitor;
import org.odftoolkit.odfdom.dom.OdfDocumentNamespace;
import org.odftoolkit.odfdom.dom.element.text.TextHElement;
import org.odftoolkit.odfdom.dom.element.text.TextLineBreakElement;
import org.odftoolkit.odfdom.dom.element.text.TextPElement;
import org.odftoolkit.odfdom.dom.element.text.TextSElement;
import org.odftoolkit.odfdom.dom.element.text.TextTabElement;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.w3c.dom.Node;

/**
 * It's a sub class of DefaultElementVisitor. It provides a method to get the display text
 * of a single element.
 * <p> If you pass the content root as the parameter, the whole document content will be 
 * returned, without any tag information.</p>
 * <p> It implements part of white space handling fuctions: text:p, text:h, text:s, text:tab, text:linebreak are processed
 * according to ODF specification.</p>
 * 
 * @deprecated As of release 0.8.8, replaced by {@link org.odftoolkit.simple.common.TextExtractor} in Simple API.
 */
public class OdfTextExtractor extends DefaultElementVisitor {

	protected StringBuilder mTextBuilder;
	OdfElement mElement;
	protected static final char NewLineChar = '\r';
	protected static final char TabChar = '\t';

	/**
	 * Default constructor
	 */
	protected OdfTextExtractor() {
	}

	/**
	 * Constructor with an ODF element as paramter
	 * @param element the ODF element whose text would be extracted. 
	 */
	protected OdfTextExtractor(OdfElement element) {
		mTextBuilder = new StringBuilder();
		mElement = element;
	}

	/**
	 * Append the text content of this element to string buffer.
	 * @param ele the ODF element whose text will be appended.
	 */
	protected void appendElementText(OdfElement ele) {
		Node node = ele.getFirstChild();

		while (node != null) {
			if (node.getNodeType() == Node.TEXT_NODE) {
				mTextBuilder.append(node.getNodeValue());
			} else if (node.getNodeType() == Node.ELEMENT_NODE) {
				OdfElement element = (OdfElement) node;
				element.accept(this);
			}
			node = node.getNextSibling();
		}
	}

	/**
	 * An instance of OdfTextExtractor will be created to 
	 * extract the text content of an ODF element.
	 * @param element the ODF element whose text will be extracted.
	 * @return An instance of OdfTextExtractor
	 */
	public static OdfTextExtractor newOdfTextExtractor(OdfElement element) {
		return new OdfTextExtractor(element);
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.pkg.OdfElement)
	 */
	@Override
	public void visit(OdfElement element) {
		if (element.getNamespaceURI().equals(OdfDocumentNamespace.META.getUri())
				|| element.getNamespaceURI().equals(OdfDocumentNamespace.DC.getUri())) {
			mTextBuilder.append(NewLineChar);
		}
		appendElementText(element);
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextPElement)
	 */
	@Override
	public void visit(TextPElement ele) {
		mTextBuilder.append(NewLineChar);
		appendElementText(ele);
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextHElement)
	 */
	@Override
	public void visit(TextHElement ele) {
		mTextBuilder.append(NewLineChar);
		appendElementText(ele);
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextSElement)
	 */
	@Override
	public void visit(TextSElement ele) {
		Integer count = ele.getTextCAttribute();
		if (count == null) {
			count = 1;
		}
		for (int i = 0; i < count; i++) {
			mTextBuilder.append(' ');
		}
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement)
	 */
	@Override
	public void visit(TextTabElement ele) {
		mTextBuilder.append(TabChar);
	}

	/* (non-Javadoc)
	 * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextLineBreakElement)
	 */
	@Override
	public void visit(TextLineBreakElement ele) {
		mTextBuilder.append(NewLineChar);
		appendElementText(ele);
	}

	/**
	 * Return the text content as a string
	 * @return the text content as a string
	 */
	public String getText() {
		visit(mElement);
		return mTextBuilder.toString();
	}
}
